#ROC plots for diff models

#for baseline
trainBetaAll = coef(mlogit1)
Xall = data.matrix( cbind(1, indiadata[, names(trainBetaAll)[2:length(trainBetaAll)] ] ) )
predValsAll = Xall %*% trainBetaAll
predProbsAll = 1/( 1 + exp(-predValsAll) )


## ROC Plot
# Choose a threshold
threshold = 0.5

# Classify predictions as zero or one based on threshold
predRocbase = as.numeric( predProbsAll > threshold) #turning predProbs into 0 or 1 vector

# Calculate False positive rate and True positive rate
# FPR: probability to be predicted positive, given that someone is negative
FPRbase = sum( (predRocbase == 1)* (indiadata$statehood==0) ) / sum(indiadata$statehood == 0)

# TPR: probability to be predicted positive, given that someone is positive
TPRbase = sum( (predRocbase == 1)* (indiadata$statehood==1) ) / sum(indiadata$statehood == 1)

# To generate a ROC plot we do this process for multiple threshold values
roc = function(threshold){
  # Set up output matrix
  pr=matrix(NA, ncol=3, nrow=length(threshold), 
            dimnames=list(NULL, c('Threshold', 'FPR', 'TPR') ) )
  
  # Loop through thresholds
  for(ii in 1:length(threshold)){
    predRocbase = as.numeric(predProbsAll >  threshold[ii]) #bad practice to have predProbs defined outside function beforehand
    FPR = sum( (predRocbase == 1)* (indiadata$statehood==0) ) / sum(indiadata$statehood == 0)
    
    # TPR: probability to be predicted positive, given that someone is positive
    TPR = sum( (predRocbase == 1)* (indiadata$statehood==1) ) / sum(indiadata$statehood == 1)
    
    #start inputing into matrix
    pr[ii, 1] = threshold[ii]
    pr[ii, 2] = FPR
    pr[ii, 3] = TPR
  }
  
  
  # Return output
  return( data.frame( pr ) )
}

# Plot roc plot
rocCurve = roc( seq(0, 1, by = .001) )

# Approximating area under the curve
i = 2:nrow(rocCurve)
aucBase = (rocCurve$FPR[i] - rocCurve$FPR[i - 1]) %*% (rocCurve$TPR[i] + rocCurve$TPR[i - 1])/2
aucBase

rocBase = ggplot(rocCurve, aes(x=FPR, y=TPR)) + geom_line(colour="midnightblue",lwd=.8) + geom_abline(intercept = 0, slope =1) #addign in diaganol to see if model does at least as well as coin flip. it doesnt here
#rocBase = rocBase + geom_rect(fill = "grey94")
rocBase = rocBase + ggtitle("ROC Curve: Base Model") + ylab("True Positive Rate") + xlab("False Positive Rate")
rocBase = rocBase + theme_bw()  + theme(
  plot.background = element_blank()
  ,panel.grid.major = element_blank()
  ,panel.grid.minor = element_blank()
  #,panel.border = element_blank()
) 
rocBase

#for spatial
trainBetaspatial = coef(spatialModel2)
Xspatial = data.matrix( cbind(1, indiadata[, names(trainBetaspatial)[2:length(trainBetaspatial)] ] ) )
predValsspatial = Xspatial %*% trainBetaspatial
predProbsspatial = 1/( 1 + exp(-predValsspatial) )


rocSpat = function(threshold){
  # Set up output matrix
  prSpat=matrix(NA, ncol=3, nrow=length(threshold), 
            dimnames=list(NULL, c('Threshold', 'FPR', 'TPR') ) )
  
  # Loop through thresholds
  for(ii in 1:length(threshold)){
    predRocspatial = as.numeric(predProbsspatial >  threshold[ii]) #bad practice to have predProbs defined outside function beforehand
    FPR = sum( (predRocspatial == 1)* (indiadata$statehood==0) ) / sum(indiadata$statehood == 0)
    
    # TPR: probability to be predicted positive, given that someone is positive
    TPR = sum( (predRocspatial == 1)* (indiadata$statehood==1) ) / sum(indiadata$statehood == 1)
    
    #start inputing into matrix
    prSpat[ii, 1] = threshold[ii]
    prSpat[ii, 2] = FPR
    prSpat[ii, 3] = TPR
  }
  
  
  # Return output
  return( data.frame( prSpat ) )
}

# Plot roc plot
rocCurveSpat2 = rocSpat( seq(0, 1, by = .001) )

# Approximating area under the curve
rocSpatplot = ggplot(rocCurveSpat, aes(x=FPR, y=TPR)) + geom_line(colour="midnightblue",lwd=.8) + geom_abline(intercept = 0, slope =1) 
rocSpatplot = rocSpatplot + ggtitle("ROC Curve: Model with Spatial Variable Included")  + ylab("True Positive Rate") + xlab("False Positive Rate")
rocSpatplot = rocSpatplot + theme_bw()  + theme(
  plot.background = element_blank()
  ,panel.grid.major = element_blank()
  ,panel.grid.minor = element_blank()
  #,panel.border = element_blank()
) 
rocSpatplot




rocSpatplot2= ggplot(rocCurveSpat2, aes(x=FPR, y=TPR)) + geom_line(colour="midnightblue",lwd=.8) + geom_abline(intercept = 0, slope =1) 
rocSpatplot2 = rocSpatplot2 + ggtitle("ROC Curve: Spatial Variable, No Interaction")  + ylab("True Positive Rate") + xlab("False Positive Rate")
rocSpatplot2 = rocSpatplot2 + theme_bw()  + theme(
  plot.background = element_blank()
  ,panel.grid.major = element_blank()
  ,panel.grid.minor = element_blank()
  #,panel.border = element_blank()
) 
rocSpatplot2

rocCombined = ggplot(rocCurveSpat, aes(x=FPR, y=TPR)) + geom_line(colour="midnightblue",lwd=.8)  + geom_line(aes(x=FPR, y=TPR), data=rocCurve, colour="#CC79A7", lwd=.8) + geom_abline(intercept = 0, slope =1)  
rocCombined = rocCombined + ggtitle("ROC Curves for Baseline and Extended Models")  + ylab("True Positive Rate") + xlab("False Positive Rate")
rocCombined = rocCombined + theme_bw()  + theme(
  plot.background = element_blank()
  ,panel.grid.major = element_blank()
  ,panel.grid.minor = element_blank()
  #,panel.border = element_blank()
) 
#rocCombined = rocCombined + geom_line(aes(x=FPR, y=TPR), data=rocCurveSpat, colour="red", lwd=.8) guide_legend(title = "test")
rocCombined = rocCombined + scale_colour_discrete(breaks = c(rocCurveSpat2, rocCurve))
rocCombined


multiplot(rocSpatplot, rocSpatplot2, rocBase, cols=2)
rocMatrix = cbind(predProbsspatial, indiadata$statehood)

# Approximating area under the curve -Spatial Model no Interaction
i = 2:nrow(rocCurveSpat2)
auc = (rocCurveSpat2$FPR[i] - rocCurveSpat2$FPR[i - 1]) %*% (rocCurveSpat2$TPR[i] + rocCurveSpat2$TPR[i - 1])/2
auc

#AUC for spatial model , w/interaction
i = 2:nrow(rocCurveSpat)
aucSpat = (rocCurveSpat$FPR[i] - rocCurveSpat$FPR[i - 1]) %*% (rocCurveSpat$TPR[i] + rocCurveSpat$TPR[i - 1])/2
aucSpat
